pm566-assignment1

Author

Stella Zhou

1. Import datasets

epa02 <- read.csv("EPA2002.csv")
dim(epa02) 
[1] 15976    20
str(epa02)
'data.frame':   15976 obs. of  20 variables:
 $ Date                          : chr  "01/05/2002" "01/06/2002" "01/08/2002" "01/11/2002" ...
 $ Source                        : chr  "AQS" "AQS" "AQS" "AQS" ...
 $ Site.ID                       : int  60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 ...
 $ POC                           : int  1 1 1 1 1 1 1 1 1 1 ...
 $ Daily.Mean.PM2.5.Concentration: num  25.1 31.6 21.4 25.9 34.5 41 29.3 15 18.8 37.9 ...
 $ UNITS                         : chr  "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" ...
 $ DAILY_AQI_VALUE               : int  78 92 71 80 98 115 87 57 65 107 ...
 $ Site.Name                     : chr  "Livermore" "Livermore" "Livermore" "Livermore" ...
 $ DAILY_OBS_COUNT               : int  1 1 1 1 1 1 1 1 1 1 ...
 $ PERCENT_COMPLETE              : num  100 100 100 100 100 100 100 100 100 100 ...
 $ AQS_PARAMETER_CODE            : int  88101 88101 88101 88101 88101 88101 88101 88101 88101 88101 ...
 $ AQS_PARAMETER_DESC            : chr  "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" ...
 $ CBSA_CODE                     : int  41860 41860 41860 41860 41860 41860 41860 41860 41860 41860 ...
 $ CBSA_NAME                     : chr  "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" ...
 $ STATE_CODE                    : int  6 6 6 6 6 6 6 6 6 6 ...
 $ STATE                         : chr  "California" "California" "California" "California" ...
 $ COUNTY_CODE                   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ COUNTY                        : chr  "Alameda" "Alameda" "Alameda" "Alameda" ...
 $ SITE_LATITUDE                 : num  37.7 37.7 37.7 37.7 37.7 ...
 $ SITE_LONGITUDE                : num  -122 -122 -122 -122 -122 ...
summary(epa02)
     Date              Source             Site.ID              POC       
 Length:15976       Length:15976       Min.   :60010007   Min.   :1.000  
 Class :character   Class :character   1st Qu.:60290014   1st Qu.:1.000  
 Mode  :character   Mode  :character   Median :60590007   Median :1.000  
                                       Mean   :60549600   Mean   :1.581  
                                       3rd Qu.:60731002   3rd Qu.:1.000  
                                       Max.   :61131003   Max.   :6.000  
                                                                         
 Daily.Mean.PM2.5.Concentration    UNITS           DAILY_AQI_VALUE 
 Min.   :  0.00                 Length:15976       Min.   :  0.00  
 1st Qu.:  7.00                 Class :character   1st Qu.: 29.00  
 Median : 12.00                 Mode  :character   Median : 50.00  
 Mean   : 16.12                                    Mean   : 53.68  
 3rd Qu.: 20.50                                    3rd Qu.: 69.00  
 Max.   :104.30                                    Max.   :176.00  
                                                                   
  Site.Name         DAILY_OBS_COUNT PERCENT_COMPLETE AQS_PARAMETER_CODE
 Length:15976       Min.   :1       Min.   :100      Min.   :88101     
 Class :character   1st Qu.:1       1st Qu.:100      1st Qu.:88101     
 Mode  :character   Median :1       Median :100      Median :88101     
                    Mean   :1       Mean   :100      Mean   :88215     
                    3rd Qu.:1       3rd Qu.:100      3rd Qu.:88502     
                    Max.   :1       Max.   :100      Max.   :88502     
                                                                       
 AQS_PARAMETER_DESC   CBSA_CODE      CBSA_NAME           STATE_CODE
 Length:15976       Min.   :12540   Length:15976       Min.   :6   
 Class :character   1st Qu.:23420   Class :character   1st Qu.:6   
 Mode  :character   Median :40140   Mode  :character   Median :6   
                    Mean   :33270                      Mean   :6   
                    3rd Qu.:41740                      3rd Qu.:6   
                    Max.   :49700                      Max.   :6   
                    NA's   :929                                    
    STATE            COUNTY_CODE        COUNTY          SITE_LATITUDE  
 Length:15976       Min.   :  1.00   Length:15976       Min.   :32.63  
 Class :character   1st Qu.: 29.00   Class :character   1st Qu.:34.07  
 Mode  :character   Median : 59.00   Mode  :character   Median :35.36  
                    Mean   : 54.78                      Mean   :36.00  
                    3rd Qu.: 73.00                      3rd Qu.:37.77  
                    Max.   :113.00                      Max.   :41.71  
                                                                       
 SITE_LONGITUDE  
 Min.   :-124.2  
 1st Qu.:-121.4  
 Median :-119.1  
 Mean   :-119.4  
 3rd Qu.:-117.9  
 Max.   :-115.5  
                 
epa22 <- read.csv("EPA2022.csv")
dim(epa22)
[1] 57775    20
str(epa22)
'data.frame':   57775 obs. of  20 variables:
 $ Date                          : chr  "01/01/2022" "01/02/2022" "01/03/2022" "01/04/2022" ...
 $ Source                        : chr  "AQS" "AQS" "AQS" "AQS" ...
 $ Site.ID                       : int  60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 60010007 ...
 $ POC                           : int  3 3 3 3 3 3 3 3 3 3 ...
 $ Daily.Mean.PM2.5.Concentration: num  12.7 13.9 7.1 3.7 4.2 3.8 2.3 6.9 13.6 11.2 ...
 $ UNITS                         : chr  "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" ...
 $ DAILY_AQI_VALUE               : int  52 55 30 15 18 16 10 29 54 47 ...
 $ Site.Name                     : chr  "Livermore" "Livermore" "Livermore" "Livermore" ...
 $ DAILY_OBS_COUNT               : int  1 1 1 1 1 1 1 1 1 1 ...
 $ PERCENT_COMPLETE              : num  100 100 100 100 100 100 100 100 100 100 ...
 $ AQS_PARAMETER_CODE            : int  88101 88101 88101 88101 88101 88101 88101 88101 88101 88101 ...
 $ AQS_PARAMETER_DESC            : chr  "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" ...
 $ CBSA_CODE                     : int  41860 41860 41860 41860 41860 41860 41860 41860 41860 41860 ...
 $ CBSA_NAME                     : chr  "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" "San Francisco-Oakland-Hayward, CA" ...
 $ STATE_CODE                    : int  6 6 6 6 6 6 6 6 6 6 ...
 $ STATE                         : chr  "California" "California" "California" "California" ...
 $ COUNTY_CODE                   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ COUNTY                        : chr  "Alameda" "Alameda" "Alameda" "Alameda" ...
 $ SITE_LATITUDE                 : num  37.7 37.7 37.7 37.7 37.7 ...
 $ SITE_LONGITUDE                : num  -122 -122 -122 -122 -122 ...
summary(epa22)
     Date              Source             Site.ID              POC        
 Length:57775       Length:57775       Min.   :60010007   Min.   : 1.000  
 Class :character   Class :character   1st Qu.:60311004   1st Qu.: 1.000  
 Mode  :character   Mode  :character   Median :60631007   Median : 3.000  
                                       Mean   :60571692   Mean   : 2.531  
                                       3rd Qu.:60771003   3rd Qu.: 3.000  
                                       Max.   :61131003   Max.   :21.000  
                                                                          
 Daily.Mean.PM2.5.Concentration    UNITS           DAILY_AQI_VALUE 
 Min.   : -2.200                Length:57775       Min.   :  0.00  
 1st Qu.:  4.200                Class :character   1st Qu.: 18.00  
 Median :  7.000                Mode  :character   Median : 29.00  
 Mean   :  8.574                                   Mean   : 32.95  
 3rd Qu.: 10.900                                   3rd Qu.: 45.00  
 Max.   :302.500                                   Max.   :353.00  
                                                                   
  Site.Name         DAILY_OBS_COUNT PERCENT_COMPLETE AQS_PARAMETER_CODE
 Length:57775       Min.   :1       Min.   :100      Min.   :88101     
 Class :character   1st Qu.:1       1st Qu.:100      1st Qu.:88101     
 Mode  :character   Median :1       Median :100      Median :88101     
                    Mean   :1       Mean   :100      Mean   :88196     
                    3rd Qu.:1       3rd Qu.:100      3rd Qu.:88101     
                    Max.   :1       Max.   :100      Max.   :88502     
                                                                       
 AQS_PARAMETER_DESC   CBSA_CODE      CBSA_NAME           STATE_CODE
 Length:57775       Min.   :12540   Length:57775       Min.   :6   
 Class :character   1st Qu.:31080   Class :character   1st Qu.:6   
 Mode  :character   Median :40140   Mode  :character   Median :6   
                    Mean   :35447                      Mean   :6   
                    3rd Qu.:41860                      3rd Qu.:6   
                    Max.   :49700                      Max.   :6   
                    NA's   :4761                                   
    STATE            COUNTY_CODE        COUNTY          SITE_LATITUDE  
 Length:57775       Min.   :  1.00   Length:57775       Min.   :32.58  
 Class :character   1st Qu.: 31.00   Class :character   1st Qu.:34.14  
 Mode  :character   Median : 63.00   Mode  :character   Median :36.60  
                    Mean   : 57.02                      Mean   :36.37  
                    3rd Qu.: 77.00                      3rd Qu.:38.10  
                    Max.   :113.00                      Max.   :41.76  
                                                                       
 SITE_LONGITUDE  
 Min.   :-124.2  
 1st Qu.:-121.5  
 Median :-119.8  
 Mean   :-119.7  
 3rd Qu.:-118.1  
 Max.   :-115.5  
                 

Daily PM2.5

summary(epa02$Daily.Mean.PM2.5.Concentration)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.00    7.00   12.00   16.12   20.50  104.30 
summary(epa22$Daily.Mean.PM2.5.Concentration)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 -2.200   4.200   7.000   8.574  10.900 302.500 
sum(is.na(epa02$Daily.Mean.PM2.5.Concentration))
[1] 0
sum(is.na(epa22$Daily.Mean.PM2.5.Concentration))
[1] 0

There are a total of 20 variables for each year’s EPA summary. The summary data for Daily.Mean.PM2.5.Concentration shows that there are no missing values for 2002 and 2022, however, there is a neagtive minimum value for year 2022, suggesting that we need to take a closer look at the data for issues.

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.4
✔ ggplot2   3.4.3     ✔ stringr   1.5.0
✔ lubridate 1.9.2     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(data.table)

Attaching package: 'data.table'

The following objects are masked from 'package:lubridate':

    hour, isoweek, mday, minute, month, quarter, second, wday, week,
    yday, year

The following object is masked from 'package:purrr':

    transpose

The following objects are masked from 'package:dplyr':

    between, first, last
epa02 <- epa02 %>%
    filter(Daily.Mean.PM2.5.Concentration > 0)
summary(epa02$Daily.Mean.PM2.5.Concentration)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.10    7.00   12.00   16.12   20.50  104.30 
epa22 <- epa22 %>%
    filter(Daily.Mean.PM2.5.Concentration > 0)
summary(epa22$Daily.Mean.PM2.5.Concentration)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.100   4.200   7.000   8.626  10.900 302.500 

2. Combine the two years of data into one data frame

epa_merge = merge(x = epa02,y = epa22, all=TRUE)

Transform Date from chr to date format, and then create a Year variable (numeric)

epa_merge$Date <- as.Date(epa_merge$Date,"%m/%d/%Y") 
epa_merge$Year <- as.numeric(format(epa_merge$Date,'%Y')) 
str(epa_merge)
'data.frame':   73413 obs. of  21 variables:
 $ Date                          : Date, format: "2002-01-01" "2002-01-01" ...
 $ Source                        : chr  "AQS" "AQS" "AQS" "AQS" ...
 $ Site.ID                       : int  60074001 60130002 60290014 60290014 60290014 60370002 60371103 60374002 60590007 60658001 ...
 $ POC                           : int  3 1 1 3 4 1 1 1 1 1 ...
 $ Daily.Mean.PM2.5.Concentration: num  10.6 20.9 26.1 30.3 31.1 32.3 39.6 47.1 47.9 66.3 ...
 $ UNITS                         : chr  "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" "ug/m3 LC" ...
 $ DAILY_AQI_VALUE               : int  44 70 80 89 91 93 111 130 132 157 ...
 $ Site.Name                     : chr  "TRAFFIC, RURAL PAVED ROAD" "Concord" "Bakersfield-California" "Bakersfield-California" ...
 $ DAILY_OBS_COUNT               : int  1 1 1 1 1 1 1 1 1 1 ...
 $ PERCENT_COMPLETE              : num  100 100 100 100 100 100 100 100 100 100 ...
 $ AQS_PARAMETER_CODE            : int  88502 88101 88101 88502 88502 88101 88101 88101 88101 88101 ...
 $ AQS_PARAMETER_DESC            : chr  "Acceptable PM2.5 AQI & Speciation Mass" "PM2.5 - Local Conditions" "PM2.5 - Local Conditions" "Acceptable PM2.5 AQI & Speciation Mass" ...
 $ CBSA_CODE                     : int  17020 41860 12540 12540 12540 31080 31080 31080 31080 40140 ...
 $ CBSA_NAME                     : chr  "Chico, CA" "San Francisco-Oakland-Hayward, CA" "Bakersfield, CA" "Bakersfield, CA" ...
 $ STATE_CODE                    : int  6 6 6 6 6 6 6 6 6 6 ...
 $ STATE                         : chr  "California" "California" "California" "California" ...
 $ COUNTY_CODE                   : int  7 13 29 29 29 37 37 37 59 65 ...
 $ COUNTY                        : chr  "Butte" "Contra Costa" "Kern" "Kern" ...
 $ SITE_LATITUDE                 : num  39.3 37.9 35.4 35.4 35.4 ...
 $ SITE_LONGITUDE                : num  -122 -122 -119 -119 -119 ...
 $ Year                          : num  2002 2002 2002 2002 2002 ...

rename variables

epa_merge <- 
  rename(epa_merge, 
         dailyPM2.5 = Daily.Mean.PM2.5.Concentration,
         dailyAQI = DAILY_AQI_VALUE,
         lat = SITE_LATITUDE,
         long = SITE_LONGITUDE)

3. Create map using leaflet()

library(leaflet)

temp.pal <- colorFactor(c('skyblue','slateblue'), domain = epa_merge$Year) # Palette creation
  
leaflet(epa_merge) %>% 
  addProviderTiles('CartoDB.Positron') %>%
  addCircles(
    lat = ~lat, lng = ~long, color = ~temp.pal(Year),
    opacity = 1, fillOpacity = 1, radius = 100) %>%
  addLegend('bottomleft', pal=temp.pal, values=epa_merge$Year,
            title='Year', opacity=1)

There are more stations in 2022 compared to 2002, and these new sites especially arose in population dense areas such as San Francisco, Los Angeles, and San Diego. This is important becuase population dense areas are usually polluted due to transportation, hence monitoring PM2.5 in such areas help environmentalists understand patterns and advocate for environment friendly policies.

4. check for missing pm2.5 value and summarize pattern

sum(is.na(epa_merge$dailyPM2.5))
[1] 0
setorder(epa_merge, dailyPM2.5)
epa_merge %>%
  select(Date, dailyPM2.5, dailyAQI, Site.Name)
#data frame hidden becuase it's too large
setorder(epa_merge, -dailyPM2.5)
epa_merge %>%
  select(Date, dailyPM2.5, dailyAQI, Site.Name)
#data frame hidden becuase it's too large

There does not appear to be a pattern for the best air quality dates in terms of location, however, the lowest daily PM2.5 values appeared in mid-and late January 2002 and 2022. The dates with abnormally high daily PM2.5 values corresponded to the wild fires dates in late summer in California, 2022. For example, the highest daily PM2.5 concentration of 302.5 ug/m^3 LC was recorded in Yreka on July 31st, 2022, corresponding to the McKinney fire on July 29th, 2022. Following sites also coincided with the other 2022 wild fires such as the Mosquito fire in September and October.

5. Explore the main question of interest at three different spatial levels: state, county, sites in LA

State: Histogram

epa_merge$Year1 <- as.factor(epa_merge$Year)
library(ggplot2)
epa_merge$Year1 <- relevel(epa_merge$Year1,'2022')
ggplot(epa_merge, aes(x = dailyPM2.5,  fill = Year1)) +
  geom_histogram(bins=100, color='black',alpha=0.5,position = 'identity') +
  labs(title="Distribution of sites by Daily PM2.5 Concentration in 2002 and 2022", x="Daily PM2.5 Concentration", y= "Count")+
  xlim(0,100)
Warning: Removed 39 rows containing non-finite values (`stat_bin()`).
Warning: Removed 4 rows containing missing values (`geom_bar()`).

ggplot(epa_merge, aes(x = dailyPM2.5,  fill = Year1)) +
  geom_histogram(bins=100,position = 'dodge') +
  labs(title="Distribution of sites that reported unhealthy Daily PM2.5 Concentration in 2002 and 2022", x="Daily PM2.5 Concentration", y= "Count")+
  xlim(35,310)
Warning: Removed 71334 rows containing non-finite values (`stat_bin()`).
Warning: Removed 3 rows containing missing values (`geom_bar()`).

epa_merge %>%
  group_by(Year) %>%
  summarise(mean = mean(dailyPM2.5,na.rm = TRUE),
            median = median(dailyPM2.5,na.rm = TRUE),
            sd = sd(dailyPM2.5),
            min = min(dailyPM2.5),
            max = max(dailyPM2.5),
            IQR = IQR(dailyPM2.5,na.rm = TRUE))
# A tibble: 2 × 7
   Year  mean median    sd   min   max   IQR
  <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
1  2002 16.1      12 13.9    0.1  104.  13.5
2  2022  8.63      7  7.84   0.1  302.   6.7

California reported more measurements in 2022 since more sites has been built in the 20 years.

The histogram shows a positive skew of PM2.5 concentration for both years.

According to the summary statistics created, the average daily PM2.5 concentration was higher in 2002 (16.12 ug/m^3 LC) than 2022 (8.63 ug/m^3 LC), as well as a higher median, standard deviation, and IQR. The maximum measurement as mentioned in the previous question, was 302.5 ug/m^3 LC in 2022.

According to the EPA, The National Ambient Air Quality Standards (NAAQS) for PM2.5 is set standards thet a 24-hour PM2.5 concentration of 35 ug/m^3 LC and above to be unhealthy for sensitive groups. Therefore to understand the change in unhealthy air quality in the 20 years, a second histogram is created to show the distribution PM2.5 concentration of 35 ug/m^3 LC and above in California. The graph shows that there are less sites that reported unhealthy air quality days in 2022 compared to 2002. In summary, the overall air quality has improved over the 20 years.

County

ggplot(epa_merge) +
  geom_point(mapping = aes(x = COUNTY, y = dailyPM2.5, colour = factor(Year))) +
  scale_color_manual(values=c("slateblue", "skyblue")) +
  labs(x = "County", y = "Daily. PM2.5 Concentration (ug/m^3 LC)") +
  theme(axis.text.x = element_text(angle = 90, vjust = .5, size = 5))

epa_merge %>%
  group_by(COUNTY,Year) %>%
  summarise(mean = mean(dailyPM2.5,na.rm = TRUE),
            median = median(dailyPM2.5,na.rm = TRUE),
            sd = sd(dailyPM2.5),
            min = min(dailyPM2.5),
            max = max(dailyPM2.5),
            IQR = IQR(dailyPM2.5,na.rm = TRUE))
`summarise()` has grouped output by 'COUNTY'. You can override using the
`.groups` argument.
# A tibble: 98 × 8
# Groups:   COUNTY [51]
   COUNTY        Year  mean median    sd   min   max   IQR
   <chr>        <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
 1 Alameda       2002 14.3    10   11.4    1.9  61.6 10.2 
 2 Alameda       2022  8.22    7    4.94   0.4  35.5  5.8 
 3 Butte         2002 14.8    11.5 11.7    1    88   10.5 
 4 Butte         2022  6.28    4.5  5.78   0.1  42.8  5.2 
 5 Calaveras     2002  9.9     8    6.50   2    40    6.25
 6 Calaveras     2022  6.05    5    4.09   0.5  25.9  4.18
 7 Colusa        2002 11.7     9   10.0    1    57    9.5 
 8 Colusa        2022  7.61    6.7  4.76   0.6  37    6.1 
 9 Contra Costa  2002 15.1     9.5 14.5    2    76.7 10.1 
10 Contra Costa  2022  8.22    7.2  4.93   0.3  37.3  5.6 
# ℹ 88 more rows

The highest daily PM2.5 concentrations appeared in mostly in the counties that were heavily influenced by the wildfires in 2022, for example, Mariposa, Nevada, Placer, Riverside, Siskiyou, and Trinity county.

LA County

epa_LA <- epa_merge %>%
  filter(COUNTY == "Los Angeles")
ggplot(epa_LA) +
  geom_point(mapping = aes(x = Site.Name, y = dailyPM2.5, colour = factor(Year))) +
  scale_color_manual(values=c('skyblue', 'slateblue')) +
  labs(x = "LA Site Name", y = "Daily PM2.5 Concentration (ug/m^3 LC)") +
  theme(axis.text.x = element_text(angle = 90, vjust = .5, size = 5))

epa_LA %>%
  group_by(Site.Name,Year) %>%
  summarise(mean = mean(dailyPM2.5,na.rm = TRUE),
            median = median(dailyPM2.5,na.rm = TRUE),
            sd = sd(dailyPM2.5),
            min = min(dailyPM2.5),
            max = max(dailyPM2.5),
            IQR = IQR(dailyPM2.5,na.rm = TRUE))
`summarise()` has grouped output by 'Site.Name'. You can override using the
`.groups` argument.
# A tibble: 25 × 8
# Groups:   Site.Name [18]
   Site.Name                    Year  mean median    sd   min   max   IQR
   <chr>                       <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
 1 ""                           2002 23.9   21.4  12.3    5.6  61   11.9 
 2 "Azusa"                      2002 20.8   18.7  12.1    3.1  72.4 15.6 
 3 "Azusa"                      2022  9.72   9.65  4.39   3.1  18.4  7.6 
 4 "Burbank"                    2002 24.0   21.6  12.7    3.5  63   13.9 
 5 "Compton"                    2022 13.0   11.9   6.22   2.6  54.6  5.9 
 6 "Glendora"                   2022  8.52   7.9   5.42   0.3  56    7.5 
 7 "Lancaster-Division Street"  2002 10.4   10     4.47   1    24    4   
 8 "Lancaster-Division Street"  2022  7.48   7.3   2.51   0.2  15.1  3.15
 9 "Lebec"                      2002  4.82   4.8   2.78   0.6  12.4  4   
10 "Lebec"                      2022  3.52   3.45  1.66   0.9   7.3  2   
# ℹ 15 more rows

We can notice that the sites with both colors are sites that existed in both 2002 and 2020, among which, the 2022 mean daily PM2.5 concentration was smaller compared to 2002 mean. New sites that were present in 2022 showed average daily PM2.5 concentration lower than 13. Among the new sites, Compton, Long beach route 710-near road, and North Hollywood had the highest mean daily PM2.5 concentration and most often produced maximum values as well. This is possibly due to the high population, high transportation activity, and geographical reasons. Overall, there is an improvement in air quality in California over the last 20 years.